Submitting ITS2 samples to Symportal
Prior to submission
# *note: most of this was written by Dr. Carly D. Kenkel
# in Terminal home directory:
# following instructions of installing BBtools from https://jgi.doe.gov/data-and-tools/bbtools/bb-tools-user-guide/installation-guide/
# 1. download BBMap package, sftp to installation directory
# 2. untar:
tar -xvzf BBMap_(version).tar.gz
# 3. test package:
cd bbmap
~/bin/bbmap/stats.sh in=~/bin/bbmap/resources/phix174_ill.ref.fa.gz
# my adaptors, which I saved as "adaptors.fasta"
# >forward
# AATGATACGGCGACCAC
# >forwardrc
# GTGGTCGCCGTATCATT
# >reverse
# CAAGCAGAAGACGGCATAC
# >reverserc
# GTATGCCGTCTTCTGCTTG
# primers for ITS2:
# >forward
# GTGAATTGCAGAACTCCGTG
# >reverse
# CCTCCGCTTACTTATATGCTT
# making a sample list based on the first phrase before the underscore in the .fastq name
ls *R1_001.fastq | cut -d '_' -f 1 > samples.list
# cuts off the extra words in the .fastq files
for file in $(cat samples.list); do mv ${file}_*R1*.fastq ${file}_R1.fastq; mv ${file}_*R2*.fastq ${file}_R2.fastq; done
# gets rid of reads that still have the adaptor sequence
for file in $(cat samples.list); do ~/bin/bbmap/bbduk.sh in1=${file}_R1.fastq in2=${file}_R2.fastq ref=adaptors.fasta out1=${file}_R1_NoIll.fastq out2=${file}_R2_NoIll.fastq; done &>bbduk_NoIll.log
# only keeping reads that start with the primer
for file in $(cat samples.list); do ~/bin/bbmap/bbduk.sh in1=${file}_R1_NoIll.fastq in2=${file}_R2_NoIll.fastq k=15 restrictleft=21 literal=GTGAATTGCAGAACTCCGTG,CCTCCGCTTACTTATATGCTT outm1=${file}_R1_NoIll_ITS.fastq outu1=${file}_R1_check.fastq outm2=${file}_R2_NoIll_ITS.fastq outu2=${file}_R2_check.fastq; done &>bbduk_ITS.log
# higher k = more reads removed, but can't surpass k=20 or 21
# renamed them to the shorter version again
for file in $(cat samples.list); do mv ${file}_*R1*.fastq ${file}_R1.fastq; mv ${file}_*R2*.fastq ${file}_R2.fastq; done Then transferred all “R1.fastq” & “R2.fastq” files to the folder to be submitted to SymPortal
Information on Symportal output documents here
Formatting notes:
## Loading required package: lattice
## Loading required package: plyr
##
## Attaching package: 'ggpubr'
## The following object is masked from 'package:cowplot':
##
## get_legend
## The following object is masked from 'package:plyr':
##
## mutate
## Loading required package: permute
## Warning: package 'permute' was built under R version 4.1.2
## This is vegan 2.5-7
counts <- read.csv('symportal_profile_counts.csv',header=TRUE,row.names=1,check.names=FALSE)
plot(rowSums(counts)) #3 that are 0, removing them now: (513, 87, 76)
counts.no0 <- counts[1:93,]Ran once then saved to re-read in later
# import dataframe holding sample information
samdf<-read.csv("mrits_sampledata copy.csv")
head(samdf)## Sample Site X.DNA...ng.ul. island direction zone site
## 1 101 Moorea NW Inner 304 Moorea NW Backreef MNW
## 2 113 Moorea NW Outer 379 Moorea NW Forereef MNW
## 3 115 Moorea NW Outer 407 Moorea NW Forereef MNW
## 4 116 Moorea NW Outer 670 Moorea NW Forereef MNW
## 5 117 Moorea NW Outer 755 Moorea NW Forereef MNW
## 6 130 Moorea NW Outer 200 Moorea NW Forereef MNW
## site_zone DNAcon zoox
## 1 MNW-B 304.2 y
## 2 MNW-F 379.0 y
## 3 MNW-F 407.0 y
## 4 MNW-F 670.0 y
## 5 MNW-F 755.0 y
## 6 MNW-F 200.0 y
rownames(samdf) <- samdf$Sample
samdf$sample_full <- paste(samdf$site_zone,samdf$Sample)
# import taxa info
taxa <- read.csv("symportal_taxa.csv",header=TRUE)
rownames(taxa) <- as.factor(taxa$ITS2_type_profile)
mtaxa <- as.matrix(taxa)
# import counts (absolute abundance from its2 type profiles)
mcounts <- as.matrix(counts.no0)
# Construct phyloseq object
ps <- phyloseq(otu_table(mcounts, taxa_are_rows=FALSE),
sample_data(samdf),
tax_table(mtaxa))
ps## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 9 taxa and 93 samples ]
## sample_data() Sample Data: [ 93 samples by 11 sample variables ]
## tax_table() Taxonomy Table: [ 9 taxa by 7 taxonomic ranks ]
#saveRDS(ps,file="ps.its2.RDS")Ran once then saved to re-read in later
counts.all <- read.csv("symportal_allcounts.csv",header=T,row.names=1)
plot(rowSums(counts.all)) #3 that are 0, removing them now: (513, 87, 76)
counts.all.no0 <- counts.all[1:93,]
# # import taxa info - haven't done this with the full suite of types yet
# taxa <- read.csv("symportal_taxa.csv",header=TRUE)
# rownames(taxa) <- as.factor(taxa$ITS2_type_profile)
# mtaxa <- as.matrix(taxa)
# import counts (absolute abundance from its2 type profiles)
mcounts.all <- as.matrix(counts.all.no0)
# Construct phyloseq object
ps.all <- phyloseq(otu_table(mcounts.all, taxa_are_rows=FALSE),
sample_data(samdf))
ps.all## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 157 taxa and 93 samples ]
## sample_data() Sample Data: [ 93 samples by 11 sample variables ]
#saveRDS(ps.all,"ps.all.its2.RDS")#ps <- readRDS("ps.its2.RDS")
#ps.all <- readRDS("ps.all.its2.RDS")# first look at data
plot_bar(ps, "Sample", fill="ITS2_type_profile")## Warning in psmelt(physeq): The sample variables:
## Sample
## have been renamed to:
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
plot_bar(ps,"ITS2_type_profile", fill="ITS2_type_profile",facet_grid=~site_zone)## Warning in psmelt(physeq): The sample variables:
## Sample
## have been renamed to:
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
#all samples
#plot_bar(ps,"sample_full")
ps.rel <- transform_sample_counts(ps, function(x) x / sum(x))
plot_bar(ps.rel,"ITS2_type_profile", fill="ITS2_type_profile",facet_grid=~site_zone)## Warning in psmelt(physeq): The sample variables:
## Sample
## have been renamed to:
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
gg.bar <- plot_bar(ps.rel,"sample_full",fill="ITS2_type_profile")+
geom_bar(stat="identity")+#put color back here in aes
#scale_color_manual(values=c("#8E0152","#DE77AE","#FDE0EF","darkgreen","greenyellow","#25AB82","darkolivegreen3","green3","#E6F5D0"))+
scale_fill_manual(name="ITS2 type profiles", values=c("#8E0152","#DE77AE","#FDE0EF","darkgreen","greenyellow","#25AB82","darkolivegreen3","green3","#E6F5D0"))+
theme_cowplot()+
#theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
scale_x_discrete(breaks=c("MNW-B 70","MNW-F 172","MSE-B 307","MSE-F 614","TNW-B 526","TNW-F 405"),labels=c("MNW-B","MNW-F","MSE-B","MSE-F","TNW-B","TNW-F"))+
xlab("Site-reef zone")+
geom_vline(xintercept=14.5)+
geom_vline(xintercept=30.5)+
geom_vline(xintercept=46.5)+
geom_vline(xintercept=62.5)+
geom_vline(xintercept=77.5)+
geom_vline(xintercept=93.5)## Warning in psmelt(physeq): The sample variables:
## Sample
## have been renamed to:
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
gg.bar#counted 14 samples for MNW-B
#16 for MNW-F
#16 MSE-B
#16 MSE-F
#15 TNW-B
#16 TNW-F
#ggsave(gg.bar,file="sym.barplot.pdf",h=4,w=11)
#want to re-order by similar samplesps.sz <- merge_samples(ps, "site_zone")## Warning in asMethod(object): NAs introduced by coercion
## Warning in asMethod(object): NAs introduced by coercion
## Warning in asMethod(object): NAs introduced by coercion
## Warning in asMethod(object): NAs introduced by coercion
## Warning in asMethod(object): NAs introduced by coercion
## Warning in asMethod(object): NAs introduced by coercion
## Warning in asMethod(object): NAs introduced by coercion
## Warning in asMethod(object): NAs introduced by coercion
ps.rel.sz <- transform_sample_counts(ps.sz, function(x) x / sum(x))
plot_bar(ps.rel.sz, fill="ITS2_type_profile")## Warning in psmelt(physeq): The sample variables:
## Sample
## have been renamed to:
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
ps.mnw <- subset_samples(ps, site=="MNW")
ps.mnw.z <- merge_samples(ps.mnw, "zone")## Warning in asMethod(object): NAs introduced by coercion
## Warning in asMethod(object): NAs introduced by coercion
## Warning in asMethod(object): NAs introduced by coercion
## Warning in asMethod(object): NAs introduced by coercion
## Warning in asMethod(object): NAs introduced by coercion
## Warning in asMethod(object): NAs introduced by coercion
## Warning in asMethod(object): NAs introduced by coercion
## Warning in asMethod(object): NAs introduced by coercion
ps.mnw.z.rel <- transform_sample_counts(ps.mnw.z, function(x) x / sum(x))
ps.mse <- subset_samples(ps, site=="MSE")
ps.mse.z <- merge_samples(ps.mse, "zone")## Warning in asMethod(object): NAs introduced by coercion
## Warning in asMethod(object): NAs introduced by coercion
## Warning in asMethod(object): NAs introduced by coercion
## Warning in asMethod(object): NAs introduced by coercion
## Warning in asMethod(object): NAs introduced by coercion
## Warning in asMethod(object): NAs introduced by coercion
## Warning in asMethod(object): NAs introduced by coercion
## Warning in asMethod(object): NAs introduced by coercion
ps.mse.z.rel <- transform_sample_counts(ps.mse.z, function(x) x / sum(x))
ps.tnw <- subset_samples(ps, site=="TNW")
ps.tnw.z <- merge_samples(ps.tnw, "zone")## Warning in asMethod(object): NAs introduced by coercion
## Warning in asMethod(object): NAs introduced by coercion
## Warning in asMethod(object): NAs introduced by coercion
## Warning in asMethod(object): NAs introduced by coercion
## Warning in asMethod(object): NAs introduced by coercion
## Warning in asMethod(object): NAs introduced by coercion
## Warning in asMethod(object): NAs introduced by coercion
## Warning in asMethod(object): NAs introduced by coercion
ps.tnw.z.rel <- transform_sample_counts(ps.tnw.z, function(x) x / sum(x))
ps.mnw.z <- merge_samples(ps.mnw, "zone")## Warning in asMethod(object): NAs introduced by coercion
## Warning in asMethod(object): NAs introduced by coercion
## Warning in asMethod(object): NAs introduced by coercion
## Warning in asMethod(object): NAs introduced by coercion
## Warning in asMethod(object): NAs introduced by coercion
## Warning in asMethod(object): NAs introduced by coercion
## Warning in asMethod(object): NAs introduced by coercion
## Warning in asMethod(object): NAs introduced by coercion
ps.mnw.z.rel <- transform_sample_counts(ps.mnw.z, function(x) x / sum(x))
bar.mnw <- plot_bar(ps.mnw.z.rel, fill="ITS2_type_profile")+
theme_cowplot()+
scale_fill_manual(name="ITS2 type profile",values=c("#8E0152","#DE77AE","#FDE0EF","darkgreen","greenyellow","#25AB82","darkolivegreen3","green3","#E6F5D0"))+
ylab("Relative abundance")+
xlab("Reef zone")+
scale_x_discrete(labels=c("BR","FR"))+
ggtitle("Mo'orea NW")## Warning in psmelt(physeq): The sample variables:
## Sample
## have been renamed to:
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
bar.mnwbar.mse <- plot_bar(ps.mse.z.rel, fill="ITS2_type_profile")+
theme_cowplot()+
scale_fill_manual(name="ITS2 type profile",values=c("#8E0152","#DE77AE","#FDE0EF","darkgreen","greenyellow","#25AB82","darkolivegreen3","green3","#E6F5D0"))+
ylab("Relative abundance")+
scale_x_discrete(labels=c("BR","FR"))+
xlab("Reef zone")+
ggtitle("Mo'orea SE")## Warning in psmelt(physeq): The sample variables:
## Sample
## have been renamed to:
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
bar.msebar.tnw <- plot_bar(ps.tnw.z.rel, fill="ITS2_type_profile")+
theme_cowplot()+
scale_fill_manual(name="ITS2 type profile",values=c("#8E0152","#DE77AE","#FDE0EF","darkgreen","greenyellow","#25AB82","darkolivegreen3","green3","#E6F5D0"))+
xlab("Reef zone")+
ylab("Relative abundance")+
scale_x_discrete(labels=c("BR","FR"))+
ggtitle("Tahiti NW")## Warning in psmelt(physeq): The sample variables:
## Sample
## have been renamed to:
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
bar.tnwNot super interesting
ps.clade <- tax_glom(ps, "Clade")
ps.clade.rel <- transform_sample_counts(ps.clade, function(x) x / sum(x))
plot_bar(ps.clade.rel, "sample_full", fill="Clade")+
geom_bar(aes(color=Clade, fill=Clade), stat="identity", position="stack")## Warning in psmelt(physeq): The sample variables:
## Sample
## have been renamed to:
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
ps.ord <- ordinate(ps,"PCoA",distance="bray")
plot_ordination(ps, ps.ord, color ="site", shape="zone")+
geom_point(alpha=0.5)+
scale_color_manual(values=c("darkslategray3","darkslategray4","#000004"))+
scale_shape_manual(values=c(16,15))+
stat_ellipse(aes(linetype=zone))+
theme_cowplot()## Warning in MASS::cov.trob(data[, vars]): Probable convergence failure
## Warning in MASS::cov.trob(data[, vars]): Probable convergence failure
## Warning in MASS::cov.trob(data[, vars]): Probable convergence failure
#ggsave(filename="pcoa.types.site.pdf")#just cladocopium
# ps.c <- subset_taxa(ps,Clade=="C")
# ps.c.no0 <- subset_samples(ps.c,sample_sums(ps.c)!=0)
#
# ps.ord.c <- ordinate(ps.c.no0,"PCoA",distance="bray")
# plot_ordination(ps.c.no0, ps.ord.c, color ="site")+
# geom_point(alpha=0.5)
# #doesn't really change between C or C+A
ps.mnw <- subset_samples(ps,site=="MNW")
gg.pc.mnw.types <- plot_ordination(ps.mnw, ordinate(ps.mnw,"PCoA",distance="bray"), color ="zone",shape="zone")+
geom_point()+
stat_ellipse()+
scale_shape_manual(values=c(16,15),labels=c("BR","FR"))+
scale_color_manual(values=c("#ED7953FF","#8405A7FF"),labels=c("BR","FR"))+
guides(color=guide_legend(title="Reef zone"),shape=guide_legend(title="Reef zone"))+
theme_cowplot()+
xlab("Axis 1 (61.4%)")+
ylab("Axis 2 (12.2%)")+
#theme(legend.position="none")+
ggtitle("Mo'orea NW")
gg.pc.mnw.types## Warning in MASS::cov.trob(data[, vars]): Probable convergence failure
ps.mse <- subset_samples(ps,site=="MSE")
gg.pc.mse.types <- plot_ordination(ps.mse, ordinate(ps.mse,"PCoA",distance="bray"), color ="zone",shape="zone")+
geom_point()+
stat_ellipse()+
scale_shape_manual(values=c(16,15),labels=c("BR","FR"))+
scale_color_manual(values=c("#ED7953FF","#8405A7FF"),labels=c("BR","FR"))+
guides(color=guide_legend(title="Reef zone"),shape=guide_legend(title="Reef zone"))+
theme_cowplot()+
ggtitle("Mo'orea SE")+
xlab("Axis 1 (52.4%)")+
ylab("Axis 2 (23.5%)")
gg.pc.mse.types## Warning in MASS::cov.trob(data[, vars]): Probable convergence failure
ps.tnw <- subset_samples(ps,site=="TNW")
gg.pc.tnw.types <- plot_ordination(ps.tnw, ordinate(ps.tnw,"PCoA",distance="bray"), color ="zone",shape="zone")+
geom_point()+
stat_ellipse()+
scale_shape_manual(values=c(16,15),labels=c("BR","FR"))+
scale_color_manual(values=c("#ED7953FF","#8405A7FF"),labels=c("BR","FR"))+
guides(color=guide_legend(title="Reef zone"),shape=guide_legend(title="Reef zone"))+
theme_cowplot()+
ggtitle("Tahiti NW")+
xlab("Axis 1 (41.1%)")+
ylab("Axis 2 (21.5%)")
gg.pc.tnw.typesReading in coordinate files given by SymPortal: (done on full suite of sequences, not the type profiles)
pcoa.c <- read.csv("symportal_bray_pcoa_cladec.csv")
#pcoa.c <- read.csv("symportal_bray_pcoa_cladec_sqrt.csv")
pcoa.c$Sample <- pcoa.c$sample
pcoa.samdata.c <- merge(pcoa.c,samdf,by="Sample")#quick raw plot
ggplot(pcoa.samdata.c,aes(x=PC1,y=PC2,color=site))+
geom_point()ggplot(pcoa.samdata.c,aes(x=PC1,y=PC2,color=site_zone,shape=site,linetype=zone))+
geom_point()+
stat_ellipse()+
#scale_shape_manual(values=c(16,15),labels=c("Back reef","Fore reef"))+
#scale_color_manual(values=c("#ED7953FF","#8405A7FF"),labels=c("Back reef","Fore reef"))+
guides(color=guide_legend(title="Reef zone"),shape=guide_legend(title="Reef zone"))ggplot(pcoa.samdata.c,aes(x=PC1,y=PC2,color=zone,shape=zone))+
geom_point()+
stat_ellipse()+
scale_shape_manual(values=c(16,15),labels=c("Back reef","Fore reef"))+
scale_color_manual(values=c("#ED7953FF","#8405A7FF"),labels=c("Back reef","Fore reef"))+
guides(color=guide_legend(title="Reef zone"),shape=guide_legend(title="Reef zone"))## Warning in MASS::cov.trob(data[, vars]): Probable convergence failure
pcoa.mnw <- subset(pcoa.samdata.c,site=="MNW")
gg.pc.mnw <- ggplot(pcoa.mnw,aes(x=PC1,y=PC2,color=zone,shape=zone))+
geom_point()+
stat_ellipse()+
scale_shape_manual(values=c(16,15),labels=c("Back reef","Fore reef"))+
scale_color_manual(values=c("#ED7953FF","#8405A7FF"),labels=c("Back reef","Fore reef"))+
guides(color=guide_legend(title="Reef zone"),shape=guide_legend(title="Reef zone"))+
theme_cowplot()+
theme(legend.position="none")+
ggtitle("Mo'orea NW")+
xlab(" ")
gg.pc.mnwpcoa.mse <- subset(pcoa.samdata.c,site=="MSE")
gg.pc.mse <- ggplot(pcoa.mse,aes(x=PC1,y=PC2,color=zone,shape=zone))+
geom_point()+
stat_ellipse()+
scale_shape_manual(values=c(16,15),labels=c("Back reef","Fore reef"))+
scale_color_manual(values=c("#ED7953FF","#8405A7FF"),labels=c("Back reef","Fore reef"))+
guides(color=guide_legend(title="Reef zone"),shape=guide_legend(title="Reef zone"))+
theme_cowplot()+
ggtitle("Mo'orea SE")+
theme(legend.position="none")+
ylab(" ")
gg.pc.msepcoa.tnw <- subset(pcoa.samdata.c,site=="TNW")
gg.pc.tnw <- ggplot(pcoa.tnw,aes(x=PC1,y=PC2,color=zone,shape=zone))+
geom_point()+
stat_ellipse()+
scale_shape_manual(values=c(16,15),labels=c("Back reef","Fore reef"))+
scale_color_manual(values=c("#ED7953FF","#8405A7FF"),labels=c("Back reef","Fore reef"))+
guides(color=guide_legend(title="Reef zone"),shape=guide_legend(title="Reef zone"))+
theme_cowplot()+
ggtitle("Tahiti NW")+
ylab(" ")+
xlab(" ")
gg.pc.tnwlibrary(ggpubr)gg.pca <- ggarrange(gg.pc.mnw.types,gg.pc.mse.types,gg.pc.tnw.types,labels=c("(a)","(b)","(c)"),nrow=1,common.legend=T,legend="right")## Warning in MASS::cov.trob(data[, vars]): Probable convergence failure
## Warning in MASS::cov.trob(data[, vars]): Probable convergence failure
## Warning in MASS::cov.trob(data[, vars]): Probable convergence failure
gg.pcagg.bar <- ggarrange(bar.mnw,bar.mse,bar.tnw,labels=c("(d)","(e)","(f)"),nrow=1,common.legend=TRUE,legend="right")
gg.barggarrange(gg.pca,gg.bar,nrow=2)#ggsave(file="pca.bar.pdf",height=5.5,width=9.5)Thank you to Ryan Eckhart for parts of this: Github here
#BiocManager::install("edgeR", update = FALSE)
library(edgeR)## Warning: package 'edgeR' was built under R version 4.1.1
## Loading required package: limma
seqs.types <- as.data.frame(ps@otu_table)
seqs.types.t <- t(seqs.types)
its2SeqList = DGEList(counts = seqs.types.t)
head(its2SeqList$samples)## group lib.size norm.factors
## 308 1 2306 1
## 300 1 4618 1
## 97 1 2589 1
## 172 1 5651 1
## 627 1 7381 1
## 96 1 2750 1
its2SeqNorm = calcNormFactors(its2SeqList, method = "TMM")## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
head(its2SeqNorm$samples)## group lib.size norm.factors
## 308 1 2306 1.009554
## 300 1 4618 1.009554
## 97 1 2589 1.009554
## 172 1 5651 1.009554
## 627 1 7381 1.009554
## 96 1 2750 1.009554
its2TMM = t(cpm(its2SeqNorm, normalized.lib.sizes = TRUE))
#phyloseq
row.names(samdf) <- samdf$Sample
ps.norm <- phyloseq(otu_table(its2TMM, taxa_are_rows=FALSE),
sample_data(samdf),
tax_table(mtaxa))
ps.norm## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 9 taxa and 93 samples ]
## sample_data() Sample Data: [ 93 samples by 11 sample variables ]
## tax_table() Taxonomy Table: [ 9 taxa by 7 taxonomic ranks ]
ps.norm.ord <- ordinate(ps.norm,"PCoA",distance="bray")
plot_ordination(ps.norm, ps.norm.ord, color ="site")+
geom_point(alpha=0.5)ps.norm.mnw <- subset_samples(ps.norm,site=="MNW")
plot_ordination(ps.norm.mnw, ordinate(ps.norm.mnw,"PCoA","bray"), color ="zone")+
geom_point(alpha=0.5)+
stat_ellipse()## Warning in MASS::cov.trob(data[, vars]): Probable convergence failure
## Warning in MASS::cov.trob(data[, vars]): Probable convergence failure
ps.norm.mse <- subset_samples(ps.norm,site=="MSE")
plot_ordination(ps.norm.mse, ordinate(ps.norm.mse,"PCoA","bray"), color ="zone")+
geom_point(alpha=0.5)+
stat_ellipse()## Warning in MASS::cov.trob(data[, vars]): Probable convergence failure
ps.norm.tnw <- subset_samples(ps.norm,site=="TNW")
plot_ordination(ps.norm.tnw, ordinate(ps.norm.tnw,"PCoA","bray"), color ="zone")+
geom_point(alpha=0.5)+
stat_ellipse()#BiocManager::install("edgeR", update = FALSE)
library(edgeR)
seqs.all <- as.data.frame(ps.all@otu_table)
seqs.all.t <- t(seqs.all)
its2SeqList.all = DGEList(counts = seqs.all.t)
head(its2SeqList.all$samples)
its2SeqNorm.all = calcNormFactors(its2SeqList.all, method = "TMM")
head(its2SeqNorm.all$samples)
its2TMM.all = t(cpm(its2SeqNorm.all, normalized.lib.sizes = TRUE))
#phyloseq
ps.norm.all <- phyloseq(otu_table(its2TMM.all, taxa_are_rows=FALSE),
sample_data(samdf))
ps.norm.all
ps.norm.all.ord <- ordinate(ps.norm.all,"PCoA",distance="bray")
plot_ordination(ps.norm.all, ps.norm.all.ord, color ="site")+
geom_point(alpha=0.5)+
stat_ellipse()
Fuller data counts from Symportal before type profiles identified
First we purge sequences and transpose the data to work with edgeR
goods = purgeOutliers(its2Seq, count.columns = 4:length(its2Seq), otu.cut = 0.0001, sampleZcut = -5)its2SeqTransposed = t(goods[, 4:length(goods[1, ])])
its2SeqList = DGEList(counts = its2SeqTransposed)
head(its2SeqList$samples)Normalize
library(vegan)
#remotes::install_github("Jtrachsel/funfuns")
library(funfuns)
library(dplyr)## Warning: package 'dplyr' was built under R version 4.1.2
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(edgeR)Raw
seq.ps <- data.frame(ps@otu_table)
samdf.ps <- data.frame(ps@sam_data)
dist.ps <- vegdist(seq.ps)
bet.ps <- betadisper(dist.ps,samdf.ps$site)
#anova(bet.ps) #ns
permutest(bet.ps,pairwise=TRUE,permutations=999)##
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 999
##
## Response: Distances
## Df Sum Sq Mean Sq F N.Perm Pr(>F)
## Groups 2 0.1579 0.078968 1.1853 999 0.306
## Residuals 90 5.9961 0.066623
##
## Pairwise comparisons:
## (Observed p-value below diagonal, permuted p-value above diagonal)
## MNW MSE TNW
## MNW 0.65000 0.245
## MSE 0.63305 0.138
## TNW 0.26075 0.13554
plot(bet.ps)adonis(seq.ps ~ site/zone, data=samdf.ps, permutations=999) #p<001***##
## Call:
## adonis(formula = seq.ps ~ site/zone, data = samdf.ps, permutations = 999)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## site 2 6.159 3.07961 9.9353 0.16920 0.001 ***
## site:zone 3 3.277 1.09226 3.5238 0.09001 0.001 ***
## Residuals 87 26.967 0.30997 0.74079
## Total 92 36.403 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
pairwise.adonis(seq.ps, factors=samdf.ps$site, permutations=999) #tahiti different from other two## pairs F.Model R2 p.value p.adjusted
## 1 MSE vs MNW 0.8174161 0.01344049 0.445 0.445
## 2 MSE vs TNW 13.6939515 0.18333414 0.001 0.001
## 3 MNW vs TNW 12.1951261 0.17129159 0.001 0.001
# pairs F.Model R2 p.value p.adjusted
# 1 MSE vs MNW 0.8174161 0.01344049 0.445 0.445
# 2 MSE vs TNW 13.6939515 0.18333414 0.001 0.001
# 3 MNW vs TNW 12.1951261 0.17129159 0.001 0.001Normalized
seq.ps.norm <- data.frame(ps.norm@otu_table)
samdf.ps.norm <- data.frame(ps.norm@sam_data)
dist.ps.norm <- vegdist(seq.ps.norm)
bet.ps.norm <- betadisper(dist.ps.norm,samdf.ps.norm$site)## Warning in betadisper(dist.ps.norm, samdf.ps.norm$site): some squared distances
## are negative and changed to zero
anova(bet.ps.norm) #ns## Analysis of Variance Table
##
## Response: Distances
## Df Sum Sq Mean Sq F value Pr(>F)
## Groups 2 0.6538 0.32689 1.8307 0.1662
## Residuals 90 16.0702 0.17856
plot(bet.ps.norm)adonis(seq.ps.norm ~ site/zone, data=samdf.ps.norm, permutations=999) #p<001***##
## Call:
## adonis(formula = seq.ps.norm ~ site/zone, data = samdf.ps.norm, permutations = 999)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## site 2 6.880 3.4401 12.0076 0.19694 0.001 ***
## site:zone 3 3.131 1.0437 3.6429 0.08962 0.001 ***
## Residuals 87 24.925 0.2865 0.71344
## Total 92 34.936 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
pairwise.adonis(seq.ps.norm, factors=samdf.ps.norm$site, permutations=999) #tahiti different from other two## pairs F.Model R2 p.value p.adjusted
## 1 MSE vs MNW 0.8634241 0.01418626 0.401 0.401
## 2 MSE vs TNW 16.5137674 0.21304302 0.001 0.001
## 3 MNW vs TNW 14.5285152 0.19759022 0.001 0.001
# pairs F.Model R2 p.value p.adjusted
# 1 MSE vs MNW 0.8174161 0.01344049 0.445 0.445
# 2 MSE vs TNW 13.6939515 0.18333414 0.001 0.001
# 3 MNW vs TNW 12.1951261 0.17129159 0.001 0.001Relative abundance
seq.ps.rel <- data.frame(ps.rel@otu_table)
samdf.ps.rel <- data.frame(ps.rel@sam_data)
dist.ps.rel <- vegdist(seq.ps.rel)
bet.ps.rel <- betadisper(dist.ps.rel,samdf.ps.rel$site)## Warning in betadisper(dist.ps.rel, samdf.ps.rel$site): some squared distances
## are negative and changed to zero
anova(bet.ps.rel) #ns## Analysis of Variance Table
##
## Response: Distances
## Df Sum Sq Mean Sq F value Pr(>F)
## Groups 2 0.6456 0.32280 1.8182 0.1682
## Residuals 90 15.9780 0.17753
plot(bet.ps.rel)adonis(seq.ps.rel ~ site/zone, data=samdf.ps.rel, permutations=999) #p<001***##
## Call:
## adonis(formula = seq.ps.rel ~ site/zone, data = samdf.ps.rel, permutations = 999)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## site 2 6.856 3.4280 11.9219 0.19590 0.001 ***
## site:zone 3 3.126 1.0421 3.6243 0.08933 0.001 ***
## Residuals 87 25.016 0.2875 0.71477
## Total 92 34.998 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
pairwise.adonis(seq.ps.rel, factors=samdf.ps.rel$site, permutations=999) #tahiti different from other two## pairs F.Model R2 p.value p.adjusted
## 1 MSE vs MNW 0.8838874 0.01451759 0.412 0.412
## 2 MSE vs TNW 16.5137674 0.21304302 0.001 0.001
## 3 MNW vs TNW 14.3300043 0.19541802 0.001 0.001
# pairs F.Model R2 p.value p.adjusted
# 1 MSE vs MNW 0.8174161 0.01344049 0.445 0.445
# 2 MSE vs TNW 13.6939515 0.18333414 0.001 0.001
# 3 MNW vs TNW 12.1951261 0.17129159 0.001 0.001ps.mnw <- subset_samples(ps,site=="MNW")
ps.mnw.rel <- subset_samples(ps.rel,site=="MNW")
ps.mnw.norm <- subset_samples(ps.norm,site=="MNW")
ps.mse <- subset_samples(ps,site=="MSE")
ps.mse.rel <- subset_samples(ps.rel,site=="MSE")
ps.mse.norm <- subset_samples(ps.norm,site=="MSE")
ps.tnw <- subset_samples(ps,site=="TNW")
ps.tnw.rel <- subset_samples(ps.rel,site=="TNW")
ps.tnw.norm <- subset_samples(ps.norm,site=="TNW")Mo’orea NW
seq.ps.mnw <- data.frame(otu_table(ps.mnw))
samdf.ps.mnw <- data.frame(sample_data(ps.mnw))
row.names(seq.ps.mnw)==row.names(samdf.ps.mnw)## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
dist.ps.mnw <- vegdist(seq.ps.mnw)
bet.ps.mnw <- betadisper(dist.ps.mnw,samdf.ps.mnw$zone)
anova(bet.ps.mnw) #ns## Analysis of Variance Table
##
## Response: Distances
## Df Sum Sq Mean Sq F value Pr(>F)
## Groups 1 0.2608 0.26076 1.9915 0.1692
## Residuals 28 3.6661 0.13093
plot(bet.ps.mnw)adonis(seq.ps.mnw ~ zone, data=samdf.ps.mnw, permutations=999) #p<01**##
## Call:
## adonis(formula = seq.ps.mnw ~ zone, data = samdf.ps.mnw, permutations = 999)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## zone 1 1.9833 1.98334 7.6393 0.21435 0.002 **
## Residuals 28 7.2695 0.25962 0.78565
## Total 29 9.2528 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Normalized
seq.ps.mnw.norm <- data.frame(otu_table(ps.mnw.norm))
samdf.ps.mnw.norm <- data.frame(sample_data(ps.mnw.norm))
row.names(seq.ps.mnw.norm)==row.names(samdf.ps.mnw.norm)## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
dist.ps.mnw.norm <- vegdist(seq.ps.mnw.norm)
bet.ps.mnw.norm <- betadisper(dist.ps.mnw.norm,samdf.ps.mnw.norm$zone)
anova(bet.ps.mnw.norm) #ns## Analysis of Variance Table
##
## Response: Distances
## Df Sum Sq Mean Sq F value Pr(>F)
## Groups 1 0.5791 0.57914 2.8779 0.1009
## Residuals 28 5.6346 0.20124
plot(bet.ps.mnw.norm)adonis(seq.ps.mnw.norm ~ zone, data=samdf.ps.mnw.norm, permutations=999) #p<01**##
## Call:
## adonis(formula = seq.ps.mnw.norm ~ zone, data = samdf.ps.mnw.norm, permutations = 999)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## zone 1 2.0483 2.0483 8.9327 0.24186 0.002 **
## Residuals 28 6.4204 0.2293 0.75814
## Total 29 8.4687 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Rel abundance
seq.ps.mnw.rel <- data.frame(otu_table(ps.mnw.rel))
dist.ps.mnw <- vegdist(seq.ps.mnw.rel)
bet.ps.mnw <- betadisper(dist.ps.mnw,samdf.ps.mnw$zone)
anova(bet.ps.mnw) #ns## Analysis of Variance Table
##
## Response: Distances
## Df Sum Sq Mean Sq F value Pr(>F)
## Groups 1 0.5375 0.53753 2.6532 0.1145
## Residuals 28 5.6727 0.20259
permutest(bet.ps.mnw, permutations = 999,pairwise=F)##
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 999
##
## Response: Distances
## Df Sum Sq Mean Sq F N.Perm Pr(>F)
## Groups 1 0.5375 0.53753 2.6532 999 0.113
## Residuals 28 5.6727 0.20259
plot(bet.ps.mnw) #nsadonis(seq.ps.mnw.rel ~ zone, data=samdf.ps.mnw, permutations=999) #sig p < 0.01##
## Call:
## adonis(formula = seq.ps.mnw.rel ~ zone, data = samdf.ps.mnw, permutations = 999)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## zone 1 2.0437 2.04367 8.7878 0.23888 0.001 ***
## Residuals 28 6.5116 0.23256 0.76112
## Total 29 8.5553 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Mo’orea SE
seq.ps.mse <- data.frame(otu_table(ps.mse))
samdf.ps.mse <- data.frame(sample_data(ps.mse))
row.names(seq.ps.mse)==row.names(samdf.ps.mse)## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE
dist.ps.mse <- vegdist(seq.ps.mse)
bet.ps.mse <- betadisper(dist.ps.mse,samdf.ps.mse$zone)
anova(bet.ps.mse) #ns## Analysis of Variance Table
##
## Response: Distances
## Df Sum Sq Mean Sq F value Pr(>F)
## Groups 1 0.42925 0.42925 6.6824 0.01484 *
## Residuals 30 1.92711 0.06424
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(bet.ps.mse)adonis(seq.ps.mse ~ zone, data=samdf.ps.mse, permutations=999) #p<01**##
## Call:
## adonis(formula = seq.ps.mse ~ zone, data = samdf.ps.mse, permutations = 999)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## zone 1 0.9283 0.92830 3.2027 0.09646 0.029 *
## Residuals 30 8.6954 0.28985 0.90354
## Total 31 9.6237 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Normalized
seq.ps.mse.norm <- data.frame(otu_table(ps.mse.norm))
samdf.ps.mse.norm <- data.frame(sample_data(ps.mse.norm))
row.names(seq.ps.mse.norm)==row.names(samdf.ps.mse.norm)## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE
dist.ps.mse.norm <- vegdist(seq.ps.mse.norm)
bet.ps.mse.norm <- betadisper(dist.ps.mse.norm,samdf.ps.mse.norm$zone)
anova(bet.ps.mse.norm) #ns## Analysis of Variance Table
##
## Response: Distances
## Df Sum Sq Mean Sq F value Pr(>F)
## Groups 1 1.1009 1.10094 8.0701 0.00801 **
## Residuals 30 4.0927 0.13642
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(bet.ps.mse.norm)adonis(seq.ps.mse.norm ~ zone, data=samdf.ps.mse.norm, permutations=999) #p<01**##
## Call:
## adonis(formula = seq.ps.mse.norm ~ zone, data = samdf.ps.mse.norm, permutations = 999)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## zone 1 0.7813 0.78125 2.9528 0.08961 0.065 .
## Residuals 30 7.9375 0.26458 0.91039
## Total 31 8.7188 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Rel abundance
seq.ps.mse.rel <- data.frame(otu_table(ps.mse.rel))
dist.ps.mse <- vegdist(seq.ps.mse.rel)
bet.ps.mse <- betadisper(dist.ps.mse,samdf.ps.mse$zone)
anova(bet.ps.mse) #ns## Analysis of Variance Table
##
## Response: Distances
## Df Sum Sq Mean Sq F value Pr(>F)
## Groups 1 1.1009 1.10094 8.0701 0.00801 **
## Residuals 30 4.0927 0.13642
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
permutest(bet.ps.mse, permutations = 999,pairwise=F)##
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 999
##
## Response: Distances
## Df Sum Sq Mean Sq F N.Perm Pr(>F)
## Groups 1 1.1009 1.10094 8.0701 999 0.008 **
## Residuals 30 4.0927 0.13642
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(bet.ps.mse) #nsadonis(seq.ps.mse.rel ~ zone, data=samdf.ps.mse, permutations=999) #sig p < 0.01##
## Call:
## adonis(formula = seq.ps.mse.rel ~ zone, data = samdf.ps.mse, permutations = 999)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## zone 1 0.7813 0.78125 2.9528 0.08961 0.085 .
## Residuals 30 7.9375 0.26458 0.91039
## Total 31 8.7188 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Tahiti NW
seq.ps.tnw <- data.frame(otu_table(ps.tnw))
samdf.ps.tnw <- data.frame(sample_data(ps.tnw))
row.names(seq.ps.tnw)==row.names(samdf.ps.tnw)## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE
dist.ps.tnw <- vegdist(seq.ps.tnw)
bet.ps.tnw <- betadisper(dist.ps.tnw,samdf.ps.tnw$zone)
anova(bet.ps.tnw) #ns## Analysis of Variance Table
##
## Response: Distances
## Df Sum Sq Mean Sq F value Pr(>F)
## Groups 1 0.04729 0.047288 0.7298 0.3999
## Residuals 29 1.87902 0.064794
plot(bet.ps.tnw)adonis(seq.ps.tnw ~ zone, data=samdf.ps.tnw, permutations=999) #p<01**##
## Call:
## adonis(formula = seq.ps.tnw ~ zone, data = samdf.ps.tnw, permutations = 999)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## zone 1 0.3652 0.36515 0.96248 0.03212 0.386
## Residuals 29 11.0021 0.37938 0.96788
## Total 30 11.3673 1.00000
Normalized
seq.ps.tnw.norm <- data.frame(otu_table(ps.tnw.norm))
samdf.ps.tnw.norm <- data.frame(sample_data(ps.tnw.norm))
row.names(seq.ps.tnw.norm)==row.names(samdf.ps.tnw.norm)## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE
dist.ps.tnw.norm <- vegdist(seq.ps.tnw.norm)
bet.ps.tnw.norm <- betadisper(dist.ps.tnw.norm,samdf.ps.tnw.norm$zone)## Warning in betadisper(dist.ps.tnw.norm, samdf.ps.tnw.norm$zone): some squared
## distances are negative and changed to zero
anova(bet.ps.tnw.norm) #ns## Analysis of Variance Table
##
## Response: Distances
## Df Sum Sq Mean Sq F value Pr(>F)
## Groups 1 0.0801 0.080086 0.5664 0.4577
## Residuals 29 4.1001 0.141384
plot(bet.ps.tnw.norm)adonis(seq.ps.tnw.norm ~ zone, data=samdf.ps.tnw.norm, permutations=999) #p<01**##
## Call:
## adonis(formula = seq.ps.tnw.norm ~ zone, data = samdf.ps.tnw.norm, permutations = 999)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## zone 1 0.3014 0.30144 0.8273 0.02774 0.471
## Residuals 29 10.5666 0.36437 0.97226
## Total 30 10.8681 1.00000
Rel abundance
seq.ps.tnw.rel <- data.frame(otu_table(ps.tnw.rel))
dist.ps.tnw <- vegdist(seq.ps.tnw.rel)
bet.ps.tnw <- betadisper(dist.ps.tnw,samdf.ps.tnw$zone)## Warning in betadisper(dist.ps.tnw, samdf.ps.tnw$zone): some squared distances
## are negative and changed to zero
anova(bet.ps.tnw) #ns## Analysis of Variance Table
##
## Response: Distances
## Df Sum Sq Mean Sq F value Pr(>F)
## Groups 1 0.0801 0.080086 0.5664 0.4577
## Residuals 29 4.1001 0.141384
permutest(bet.ps.tnw, permutations = 999,pairwise=F)##
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 999
##
## Response: Distances
## Df Sum Sq Mean Sq F N.Perm Pr(>F)
## Groups 1 0.0801 0.080086 0.5664 999 0.436
## Residuals 29 4.1001 0.141384
plot(bet.ps.tnw) #nsadonis(seq.ps.tnw.rel ~ zone, data=samdf.ps.tnw, permutations=999) #sig p < 0.01##
## Call:
## adonis(formula = seq.ps.tnw.rel ~ zone, data = samdf.ps.tnw, permutations = 999)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## zone 1 0.3014 0.30144 0.8273 0.02774 0.474
## Residuals 29 10.5666 0.36437 0.97226
## Total 30 10.8681 1.00000
All syms
seq.all <- data.frame(otu_table(ps.all))
samdf.ps.all <- data.frame(sample_data(ps.all))
row.names(seq.all)==row.names(samdf.ps.all)
dist.all <- vegdist(seq.all)
bet.mnw.c <- betadisper(dist.all,samdf.ps.all$zone)
anova(bet.mnw.c) #ns
plot(bet.mnw.c) #very much overlap, not sig
adonis(otu.mnw.c ~ site*zone, data=samdf.mnw.c, permutations=999)seq.a <- select(seq.all,contains('A'))
#get rid of column 29 - it's a C
seq.a.clean <- seq.a[,1:28]
seq.c <- select(seq.all,contains('C'))
#get rid of column 1, it's an A
seq.c.clean <- seq.c[,2:129]
seq.ac <- seq.all %>%
select(contains('A')) %>%
select(contains('C'))
#just those two
seq.ac <- select(seq.all,!contains(c('C','A')))
#there's a B1 in there haha
seq.c.clean2 <- seq.c.clean[!rowSums(seq.c.clean)==0,]
ps.all.c <- phyloseq(otu_table(as.matrix(seq.c.clean2), taxa_are_rows=FALSE),
sample_data(samdf))
ps.all.c
seq.a.clean2 <- seq.a.clean[!rowSums(seq.a.clean)==0,]
ps.all.a <- phyloseq(otu_table(as.matrix(seq.a.clean2), taxa_are_rows=FALSE),
sample_data(samdf))
ps.all.a
#stats time - CLADOCOPIUM
dist.c <- vegdist(seq.c.clean2)
samdf.ps.c <- data.frame(sample_data(ps.all.c))
row.names(samdf.ps.c)==row.names(seq.c.clean2)
bet.c <- betadisper(dist.c,samdf.ps.c$site)
anova(bet.c) #ns
plot(bet.c) #very much overlap, not sig
adonis(seq.c.clean2 ~ site/zone, data=samdf.ps.c, permutations=999)
pairwise.adonis(seq.c.clean2, factors = samdf.ps.c$site, permutations = 999)
#Tahiti different from other two
ps.mnw.c <- subset_samples(ps.all.c,site=="MNW")
otu.mnw.c <- data.frame(otu_table(ps.mnw.c))
samdf.mnw.c <- data.frame(sample_data(ps.mnw.c))
row.names(otu.mnw.c)==row.names(samdf.mnw.c)
dist.mnw.c <- vegdist(otu.mnw.c)
bet.mnw.c <- betadisper(dist.mnw.c,samdf.mnw.c$zone)
anova(bet.mnw.c) #ns
plot(bet.mnw.c) #very much overlap, not sig
adonis(otu.mnw.c ~ zone, data=samdf.mnw.c, permutations=999)Just ran once to get the info
find.top.asv <- function(x,num){
require(phyloseq)
require(magrittr)
otu <- otu_table(x)
tax <- tax_table(x)
j1 <- apply(otu,1,sort,index.return=T, decreasing=T) # modifying which.max to return a list of sorted index
j2 <- lapply(j1,'[[',"ix") # select for index
l <- data.frame(unique(tax@.Data[unlist(j2),]))
m <- data.frame(otu@.Data[,unique(unlist(j2))])
n <- apply(m,1,sort,index.return=T, decreasing=T) %>%
lapply('[[',"ix") %>% # Extract index
lapply(head,n=num) # This to returns the top x tax
p <- list()
for(i in 1:length(n)){
p[[i]]<- colnames(m)[n[[i]]]
}
m$taxa <- p
return(m)
}
top.asvs <- find.top.asv(ps,1)
top.asvs$taxa <- as.character(top.asvs$taxa)
#write.csv(top.asvs,file="top_sym.csv",row.names=TRUE)table <- read.csv("silly_its_table.csv",header=T)
heatmap(as.matrix(table[,2:11]), labRow=table[,1],rowv=NA)## Warning in plot.window(...): "rowv" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "rowv" is not a graphical parameter
## Warning in title(...): "rowv" is not a graphical parameter